https://github.com/jamesha95/IHAD-mapping.git

Background

In 2019, the ABS released the experimental Index of Household Advantage and Disadvantage. This variable assigns a socio-economic score to each eligible household in the 2016 Census, and then allocates the households to a quartile (not population-weighted).

This variable is not yet available in TableBuilder for cross-classification (as of March 25th, 2019). This is unfortunate; it would be very helpful to be able to investigate the relationship between socio-economic disadvantage and other characteristics.

So far, the most disaggregated information available is only the percentage of households in each quartile, at the SA1 level. In this script, we will map the proportion of households in the bottom quartile for each SA1, and then aggregate to SA2 (weighting by household, and then by population to see if there’s much difference). We may then further compare to SEIFA indices.

IHAD may end up being nothing more than a slightly more nuanced version of SEIFA, where statistical areas are described not by one score but by four (that is, the proportion of households in each IHAD quartile). But hopefully it becomes available in TableBuilder in the near future to allow easy analysis of socio-economic disadvantage without the ecological validity issues of SEIFA.

To begin, we import the data from the ABS website. We’ve named the file sa1-ihad-percentages.xls and stored it in the folder named data.

# Packages first

library(devtools)
#install_github("wfmackey/absmapsdata")

library(tidyverse)
library(sf)
library(absmapsdata)
library(grattantheme)
library(sp)
library(data.table)
library(ASGS)
library(tmap)
library(leaflet)

# Now we read in the data from the website

download.file(url = "https://www.abs.gov.au/AUSSTATS/subscriber.nsf/log?openagent&statistical%20area%20level%201,%20percentage%20of%20households,%20ihad%202016.xls&4198.0&Data%20Cubes&DE8CAFA6829E80E6CA2583AD00106126&0&2016&26.02.2019&Latest",destfile = "ihad-percentages-sa1.xls")

data <- readxl::read_xls(path = "ihad-percentages-sa1.xls", 
                         sheet = "Table 1", 
                         skip = 5, 
                         col_names = TRUE)

# Now we tidy the column names 

data <- data %>%
  
# We note that column one is the 7-digit SA1 code, which is unhelpful and will be removed
  
  select(-...1) %>%
  
# And now we give sensible names to the remaining columns
  
  rename(sa1_main_2016 = ...2,
         percentage_q1 = `Quartile 1`,
         percentage_q2 = `Quartile 2`,
         percentage_q3 = `Quartile 3`,
         percentage_q4 = `Quartile 4`,
         dwellings_in_scope = ...7,
         SEIFA_quartile = ...8,
         population = ...9, 
         dwellings_total = ...10)

Combining with map data

We now append this data to the simple features ABS map data synthesised by Will Mackey.

We’ve wrangled, now let us plot!

map_sa1_melb <- 
  ggplot(data = ihad_sa1_melb) +
  geom_sf(aes(geometry = geometry,
              fill = percentage_q1),
          lwd = 0) +
  scale_fill_gradientn(colours = grattan_pal(6)) +
  theme_grattan() +
  theme(legend.position = "right",
        legend.direction = "vertical") +
  coord_sf(datum = NA) +
  labs(title = "Disadvantage across Greater Melbourne", 
       subtitle = "Percentage of households in the bottom IHAD quartile",
       fill = "Percentage")

map_sa1_syd <- 
  ggplot(data = ihad_sa1_syd) +
  geom_sf(aes(geometry = geometry,
              fill = percentage_q1),
          lwd = 0) +
  scale_fill_gradientn(colours = grattan_pal(6)) +
  theme_grattan() +
  theme(legend.position = "right",
        legend.direction = "vertical") +
  coord_sf(datum = NA) +
  labs(title = "Disadvantage across Greater Sydney", 
       subtitle = "Percentage of households in the bottom IHAD quartile",
       fill = "Percentage")  

map_sa1_melb

map_sa1_syd

Aggregating the data to SA2s or SA3s

Now that we’ve mapped the SA1s, we can see that several areas are unpopulated. We will now aggregate to the SA2 level, and try to increase interactivity in the map (hover to see the SA2 name).

The proportion of households facing disadvantage (i.e. in the lowest quartile) can be determined by summing the number of disadvantaged households in each SA2 and dividing by the total number of in-scope households.

# We need to correspond the sa1 data to sa2s, then append to sa2 geometry.

corresp_sa1_to_sa2 <- tibble(sa1_main_2016 = sa12016$sa1_main_2016, 
                             sa2_main_2016 = sa12016$sa2_main_2016)

ihad_agg <- data %>% 
  full_join(corresp_sa1_to_sa2, by = "sa1_main_2016") %>% 
  mutate(n_q1 = percentage_q1*dwellings_in_scope/100,
         n_q2 = percentage_q2*dwellings_in_scope/100,
         n_q3 = percentage_q3*dwellings_in_scope/100,
         n_q4 = percentage_q4*dwellings_in_scope/100) %>% 
  group_by(sa2_main_2016) %>%
  summarise(n_q1 = sum(n_q1, na.rm = TRUE),
            n_q2 = sum(n_q2, na.rm = TRUE),
            n_q3 = sum(n_q3, na.rm = TRUE),
            n_q4 = sum(n_q4, na.rm = TRUE),
            dwellings_in_scope = sum(dwellings_in_scope, na.rm = TRUE),
            dwellings_total = sum(dwellings_total, na.rm = TRUE),
            population = sum(population, na.rm = TRUE)) %>% 
  mutate(percentage_q1 = 100*n_q1/dwellings_in_scope,
         approx_number_households = percentage_q1*dwellings_total/100) %>% 
  full_join(sa22016, by = "sa2_main_2016")
  
# One SA2 is a clear outlier - Braeside in Melbourne (100%). It has a low sample size, about 11 households. We exclude those SA2s with more than 60% of households in the bottom quartile.

ihad_sa2_melb <- ihad_agg %>% 
  filter(gcc_name_2016 == "Greater Melbourne")

#ihad_sa2_melb_trim <- ihad_sa2_melb %>%
#  filter(percentage_q1 <= 60)

ihad_sa2_syd <- ihad_agg %>% 
  filter(gcc_name_2016 == "Greater Sydney",
         percentage_q1 <= 60)
# We need to correspond the sa1 data to sa2s, then append to sa2 geometry.

corresp_sa1_to_sa3 <- tibble(sa1_main_2016 = sa12016$sa1_main_2016, 
                             sa3_code_2016 = sa12016$sa3_code_2016)

ihad_agg_sa3 <- data %>% 
  full_join(corresp_sa1_to_sa3, by = "sa1_main_2016") %>% 
  mutate(n_q1 = percentage_q1*dwellings_in_scope,
         n_q2 = percentage_q2*dwellings_in_scope,
         n_q3 = percentage_q3*dwellings_in_scope,
         n_q4 = percentage_q4*dwellings_in_scope) %>% 
  group_by(sa3_code_2016) %>%
  summarise(n_q1 = sum(n_q1, na.rm = TRUE),
            n_q2 = sum(n_q2, na.rm = TRUE),
            n_q3 = sum(n_q3, na.rm = TRUE),
            n_q4 = sum(n_q4, na.rm = TRUE),
            dwellings_in_scope = sum(dwellings_in_scope, na.rm = TRUE),
            dwellings_total = sum(dwellings_total, na.rm = TRUE),
            population = sum(population, na.rm = TRUE)) %>% 
  mutate(percentage_q1 = n_q1/dwellings_in_scope,
         approx_number_households = percentage_q1*dwellings_total/100) %>% 
  full_join(sa32016, by = "sa3_code_2016")
  
#

ihad_sa3_melb <- ihad_agg_sa3 %>% 
  filter(gcc_name_2016 == "Greater Melbourne")

#ihad_sa2_melb_trim <- ihad_sa2_melb %>%
#  filter(percentage_q1 <= 60)

ihad_sa3_syd <- ihad_agg_sa3 %>% 
  filter(gcc_name_2016 == "Greater Sydney",
         percentage_q1 <= 60)

Having wrangled, now we plot.

#I've set `results="show"` because without it, the interactive map won't print

map_sa2_melb <- 
  ggplot(data = ihad_sa2_melb) +
  geom_sf(aes(geometry = geometry,
              fill = percentage_q1),
          lwd = 0) +
  geom_point(aes(x = cent_lat,
                 y = cent_long,
                 size = approx_number_households),
             colour = "black",
             alpha = 0.5) +
  scale_size(range = c(0.1, 1)) +
  scale_fill_gradientn(colours = grattan_pal(6)) +
  theme_grattan() +
  theme(legend.position = "right",
        legend.direction = "vertical",
        axis.title = element_blank()) +
  coord_sf(datum = NA) +
  labs(title = "Disadvantage across Greater Melbourne", 
       subtitle = "Percentage of households in the bottom IHAD quartile",
       fill = "Percentage")
  
map_sa2_melb

# Re-ordering columns so that the label is the name, not the code
ihad_sa2_melb_leaflet <- ihad_sa2_melb %>% 
  select(sa2_name_2016, everything())

# getting the tibble ready for use in tmap
sf_sa2_melb <- st_as_sf(ihad_sa2_melb_leaflet)

tmap_mode("view")

int_map <- tm_shape(sf_sa2_melb) + 
  tm_fill(col = "percentage_q1", 
          palette = grattan_pal(6),
          alpha = 0.5) +
  tm_bubbles(size = "approx_number_households", 
             col = "black", 
             border.col = "white", 
             alpha = 0.5) +
  tm_borders(col = grattan_grey1, lwd = 0.5)

int_map

Next we’ll try making a map with a dot plotted for every 50 or so households in the bottom quartile.

#  Let's try plotting one dot for every 50 households


perNhh <- 50
pointCollector <- list()

ihad_sa2_melb_trim <- ihad_sa2_melb %>%
  filter(percentage_q1 <= 60)

melb_sf <- SA2_2016
temp <- SA2_2016@data %>% as.data.table()
melb_sf@data <- temp[ , SA2_MAIN16 := SA2_MAIN16 %>% as.character()]
melb_sf <- melb_sf[melb_sf$GCC_NAME16 == "Greater Melbourne",]

for(i in ihad_sa2_melb_trim$sa2_main_2016){
  sa2_shape_frame <- melb_sf[melb_sf$SA2_MAIN16 == i, ]
  if(nrow(sa2_shape_frame) < 1){  next()  }
  if(is.na(ihad_sa2_melb_trim$approx_number_households[ihad_sa2_melb_trim$sa2_main_2016 == i])){next()}
  npts <- floor(ihad_sa2_melb_trim$approx_number_households[ihad_sa2_melb_trim$sa2_main_2016 == i]/perNhh)
  if(npts == 0){next()}
  pts <- data.frame(spsample(x = sa2_shape_frame, 
                             n = npts, 
                             type = "random", 
                             iter = 10)@coords) # iter(default = 4): number of times to try to place sample points in a polygon before giving up and returning NULL - this may occur when trying to hit a small and awkwardly shaped polygon in a large bounding box with a small number of points.
  pointCollector[[i]] <- pts
}

pointFrame <- bind_rows(pointCollector, .id = "sa2_main")

And once again, we plot.

map_sa2_melb_dots <- ggplot(ihad_sa2_melb_trim)
map_sa2_melb_dots <- map_sa2_melb_dots + theme_void()
map_sa2_melb_dots <- map_sa2_melb_dots + geom_sf(aes(geometry = geometry, 
                                                     fill = percentage_q1),
                                                 lwd = 0)
map_sa2_melb_dots <- map_sa2_melb_dots + scale_fill_gradientn(colours = grattan_pal(6))
map_sa2_melb_dots <- map_sa2_melb_dots + geom_point(data = pointFrame,
                                                    aes(x = x, y = y),
                                                    shape = ".",
                                                    colour = "BLACK",
                                                    alpha = 1/2)
map_sa2_melb_dots <- map_sa2_melb_dots + theme(legend.position = "right", 
                                               legend.direction = "vertical",
                                               legend.text = element_text(size = 12),
                                               axis.title = element_blank())
map_sa2_melb_dots <- map_sa2_melb_dots + coord_sf(datum = NA)
map_sa2_melb_dots <- map_sa2_melb_dots + labs(title = "Disadvantage across Greater Melbourne", 
                                              subtitle = "Percentage of households in the bottom IHAD quartile",
                                              fill = "Percentage",
                                              shape = "50 households")

map_sa2_melb_dots

Next, let’s try sampling the households within SA1s, but plotting onto the SA2 map. This ensures more accurate house-dot location.

#  Let's try plotting one dot for every 50 households 
# (works but very concentrated dots in inner-Melb). 

# Now let's try 100 
# (too few dots, SA1s are too small...)

# Okay let's go back to 50 but use a ceiling function
# Better, but a hist of approx_number_households shows that the average number is only 38, and the distribution is extremely skewed

# so let's try 10 with a `round` instead of a ceiling or floor. This could be too much
# and yeah, it's a lot

# next is to try 20
# still a lot

# we settle on 50 with a round feature (so SA1s with at least 25 households get a dot)

perNhh <- 50
pointCollector <- list()

ihad_sa1_melb <- ihad_sa1_melb %>% 
  mutate(approx_number_households = percentage_q1*dwellings_total/100)

melb_sa1_sf <- SA1_2016
temp <- SA1_2016@data %>% as.data.table()
melb_sa1_sf@data <- temp[ , SA1_MAIN16 := SA1_MAIN16 %>% as.character()]
melb_sa1_sf <- melb_sa1_sf[melb_sa1_sf$GCC_NAME16 == "Greater Melbourne",]

for(i in ihad_sa1_melb$sa1_main_2016){
  sa1_shape_frame <- melb_sa1_sf[melb_sa1_sf$SA1_MAIN16 == i, ]
  if(nrow(sa1_shape_frame) < 1){  next()  }
  if(is.na(ihad_sa1_melb$approx_number_households[ihad_sa1_melb$sa1_main_2016 == i])){next()}
  if(ihad_sa1_melb$approx_number_households[ihad_sa1_melb$sa1_main_2016 == i] == 0){next()}
  npts <- round(ihad_sa1_melb$approx_number_households[ihad_sa1_melb$sa1_main_2016 == i]/perNhh)
  if(npts == 0){next()}
  pts <- data.frame(spsample(x = sa1_shape_frame,
                             n = npts,
                             type = "random",
                             iter = 10)@coords) # iter(default = 4): number of times to try to place sample points in a polygon before giving up and returning NULL - this may occur when trying to hit a small and awkwardly shaped polygon in a large bounding box with a small number of points.
  pointCollector[[i]] <- pts
}

pointFrameSA1 <- bind_rows(pointCollector, .id = "sa1_main")

dim(pointFrameSA1) # just a check
head(pointFrameSA1)

Once more unto the plotting breach!

# 

map_sa2_melb_dots <- ggplot(ihad_sa2_melb)
map_sa2_melb_dots <- map_sa2_melb_dots + theme_void()
map_sa2_melb_dots <- map_sa2_melb_dots + geom_sf(aes(geometry = geometry, 
                                                     fill = percentage_q1),
                                                 lwd = 0)
map_sa2_melb_dots <- map_sa2_melb_dots + scale_fill_gradientn(colours = grattan_pal(6))
map_sa2_melb_dots <- map_sa2_melb_dots + geom_point(data = pointFrameSA1,
                                                    aes(x = x, y = y),
                                                    shape = ".",
                                                    colour = "BLACK",
                                                    alpha = 1/2)
# I don't know why, but some sampled dots have coordinates (x,y) and some have coords (coords.x1, coords.x2). I should investigate and fix, but for now I'll just plot both.
map_sa2_melb_dots <- map_sa2_melb_dots + geom_point(data = pointFrameSA1,
                                                    aes(x = coords.x1, y = coords.x2),
                                                    shape = ".",
                                                    colour = "BLACK",
                                                    alpha = 1/2)
map_sa2_melb_dots <- map_sa2_melb_dots + theme(legend.position = "right", 
                                               legend.direction = "vertical",
                                               legend.text = element_text(size = 12),
                                               axis.title = element_blank())
map_sa2_melb_dots <- map_sa2_melb_dots + coord_sf(datum = NA)
map_sa2_melb_dots <- map_sa2_melb_dots + labs(title = "Disadvantage across Greater Melbourne", 
                                              subtitle = "Percentage of households in the bottom IHAD quartile",
                                              fill = "Percentage",
                                              shape = "50 households")

map_sa2_melb_dots 

Now we’re getting fancy. We’re simulating dots for groups of households at the SA1 level to ensure locational accuracy, but plotting them onto SA3s so that we can see the underlying colour better.

# 

map_melb_sa3_dots_sa1 <- ggplot(ihad_sa3_melb)
map_melb_sa3_dots_sa1 <- map_melb_sa3_dots_sa1 + theme_void()
map_melb_sa3_dots_sa1 <- map_melb_sa3_dots_sa1 + geom_sf(aes(geometry = geometry, 
                                                     fill = percentage_q1),
                                                 lwd = 0)
map_melb_sa3_dots_sa1 <- map_melb_sa3_dots_sa1 + scale_fill_gradientn(colours = grattan_pal(6))
map_melb_sa3_dots_sa1 <- map_melb_sa3_dots_sa1 + geom_point(data = pointFrameSA1,
                                                    aes(x = x, y = y),
                                                    shape = ".",
                                                    colour = "BLACK",
                                                    alpha = 1/2)
# I don't know why, but some sampled dots have coordinates (x,y) and some have coords (coords.x1, coords.x2). I should investigate and fix, but for now I'll just plot both.
map_melb_sa3_dots_sa1 <- map_melb_sa3_dots_sa1 + geom_point(data = pointFrameSA1,
                                                    aes(x = coords.x1, y = coords.x2),
                                                    shape = ".",
                                                    colour = "BLACK",
                                                    alpha = 1/2)
map_melb_sa3_dots_sa1 <- map_melb_sa3_dots_sa1 + theme(legend.position = "right", 
                                               legend.direction = "vertical",
                                               legend.text = element_text(size = 12),
                                               axis.title = element_blank(),
                                               plot.title = element_text(hjust = 0.5), 
                                               plot.subtitle = element_text(hjust = 0.5),
                                               plot.caption = element_text(hjust = 0.5)
                                               )
                                              
map_melb_sa3_dots_sa1 <- map_melb_sa3_dots_sa1 + coord_sf(datum = NA)
map_melb_sa3_dots_sa1 <- map_melb_sa3_dots_sa1 + labs(title = "Disadvantage across Greater Melbourne", 
                                              subtitle = "Percentage of households in the bottom IHAD quartile",
                                              fill = "Percentage",
                                              caption = "Each dot represents 50 households.\nBased on Grattan Institute analysis of ABS data.")

map_melb_sa3_dots_sa1

# grattan_save("Melbourne_Disadvantaged_Households.png", 
#              object = map_melb_sa3_dots_sa1,
#              type = "fullslide")

And lastly, I want to build an interactive version of the above map, with roads visible.

We may also want to do some spatial analysis, such as distance from nearest school or an overlap of the congestion scheme region and the number of disadvantaged households.

# Removing the SA2s with few households (such as Braeside) and getting the file ready for tmap
sf_sa2_melb_trim <- ihad_sa2_melb %>% 
  select(sa2_name_2016, everything()) %>% 
  mutate(percentage_q1 = ifelse(dwellings_total >= 100, percentage_q1, NA)) %>% 
  st_as_sf()


points_to_plot <- pointFrameSA1 %>% 
  mutate(x = ifelse(is.na(x), coords.x1, x),
         y = ifelse(is.na(y), coords.x2, y)) %>% 
  select(-c(coords.x1, coords.x2)) %>% 
   st_as_sf(coords = c('x', 'y'))

tmap_mode("view")

int_map_sa1 <- tm_shape(sf_sa2_melb_trim) + 
  tm_fill(col = "percentage_q1", 
          palette = grattan_pal(6),
          alpha = 0.5) +
  tm_borders(col = grattan_grey1, lwd = 0.5) +
  
  tm_shape(points_to_plot) +
  tm_dots(col = "black",
          size = 0.01,
          shape = 16)

int_map_sa1